perm filename REV[1,BGB] blob sn#143248 filedate 1975-01-31 generic text, type C, neo UTF8
COMMENT ⊗   VALID 00014 PAGES
C REC  PAGE   DESCRIPTION
C00001 00001
C00003 00002	DRAFT - DRAFT - DRAFT - DRAFT - DRAFT - DRAFT - DRAFT - DRAFT - DRAFT - DRAFT
C00006 00003	⊂CONTENTS⊃{JAFA}
C00009 00004	⊂TITLE	A1 - Bit Serial using EXCH - 183 cycles. 8 words.⊃
C00012 00005	⊂TITLE B1 - Triple Tricky with computed mask - 119 cycles. 11 words.⊃
C00014 00006	⊂TITLE B4 - Triple Tricky 12-bytes of 3-bits  - 70 cycles. 11 words.⊃
C00015 00007	⊂TITLE C1 - Impossible Table Lookup 1-Byte  of 36-bits - 2 cycles. 2↑36+2 words.⊃
C00017 00008	⊂ TITLE C5 - Table lookup 5-bytes of 7-bits - 25 cycles. 139 words.⊃
C00019 00009	⊂TITLE C9 - Table lookup 9-bytes of 4-bits  - 40 cycles. 26 words.⊃
C00022 00010	⊂TITLE D1 - Nested byte swapping -  21 cycles. 25 words.⊃
C00024 00011	⊂TITLE D3 - Nested byte swapping -  22 cycles. 32 words.⊃
C00025 00012	⊂ TITLE E1 - Schroeppel Reverse 5-Bytes of 7-bits - 40 cycles. 18 words.⊃
C00030 00013	⊂ TITLE E3 - Schroeppel 8 bit reverse in line - 44 Cycles. 36 words. ⊃
C00033 00014	⊂ TITLE E3 - Schroeppel 8 bit reverse in line - 46 Cycles. 18 words. ⊃
C00035 ENDMK
C⊗;
DRAFT - DRAFT - DRAFT - DRAFT - DRAFT - DRAFT - DRAFT - DRAFT - DRAFT - DRAFT

{⊂C;P1;JCFD}  PDP-10 WORD BIT REVERSAL ROUTINES.

{JC;FC}  Bruce G. Baumgart
{JC;FA} Stanford Artificial Intelligence Laboratory

⊂ABSTRACT⊃

{λ4;JUFA}	Fifteen or so PDP-10 programs for reversing the order of the
bits in a  word are presented. Although the programs were written for
their entertainment  value,   they  are reported  as material  for
someone who  might want  to prove the  correctness or  equivalence of
actual machine  level programs. The methods also illustrate the trade
off between memory space and computing time.

⊂GROUND RULES⊃

	All subroutines are coded in FAIL  with the following calling
conventions:  Accumulators 0 thru  16 need  not be save  or restored,
accumulator 17 contains a control push down list pointer named P, the
argument is provided in accumulator  1, the resulting value should be
returned in  accumulator 1.  The  macro "ACCUMULATORS"  assigns increasing
accumulator numbers to a list of names starting with 1. The contents
of undeclared variable names (such as tables of reversed bytes) need
not be mentioned when obvious.
The speed and size of each routine is measured by counting
the number of memory words of code and data required and the number of
instruction executions required.

⊂NOTES⊃

	The IBM-7090 analog of the Triple TRCE bit pair reverse technique
would be:

	IIR BITMASK	;where IIR is Invert Indicators Right.
	RFT BITMASK	;and RFT  is Right half indicators off test.
	IIR BITMASK
	RFT BITMASK
	IIR BITMASK

{Q}
⊂CONTENTS⊃{JAFA}

cycles	words		Bit Serial Methods.
 183    8   			A1 - Bit Serial using EXCH.
 165    8			A2 - Bit Serial using SKIPGE/IORI.
 108    7 			A3 - Serial method using IDPB to write.
 184   13			A4 - Serial method using ILDB to read.

			Triple TRCE Methods.
119   11			B1 - Gosper triple tricky using computed mask.
 65   24			B2 - Gosper triple tricky using 18 masks from table.
 57   15			B3 - Gosper triple tricky using  9 masks from table.
 70   11			B4 - Gosper triple tricky on 12 bytes of 3 bits.

			Byte Lookup Methods.
  2   2↑36+2 (impossible)	C1 - Table lookup 1-byte of 36-bits.
  4   2↑18+4 (impossible)	C2 - Table lookup 2-bytes of 18-bits.
  9   4107			C3 - Table lookup 3-bytes of 12-bits.
 28    269			C4 - Table lookup 4-bytes of 8-bits + 4 bits.
 25    139			C5 - Table lookup 5-bytes of 7-bits + 1 bit.
 28     72			C6 - Table lookup 6-bytes of 6-bits.
 33     43			C7 - Table lookup 7-bytes of 5-bits + 1 bit.
 40     26			C9 - Table lookup 9-bytes of 4-bits.
 52     16			C12 - Table lookup 12-bytes of 3-bits.
 76     12			C18 - Table lookup 18-bytes of 2-bits.
112      8			C36 - Table lookup 36-bytes of 1-bit.
			Recursive Byte Swapping.
 21   25			D1 - Nested Byte Swapping.
			Methods using Schroeppel's Reverse.
 40   18			E1 - using 7 bit reverse in loop.
 44   36			E2 - using 8 bit reverse in loop.
 25   52			E3 - using 8 bit reverse in line.
			JFFO Methods:
 59   80			F1 - Rubin's JFFO Method, Table Oriented.
115   16			F2 - Baumgart's JFFO Method, Computation Oriented.
{λ9;JAF1}
⊂TITLE	A1 - Bit Serial using EXCH - 183 cycles. 8 words.⊃
	ACCUMULATORS(A,B,CNT)
REV:	MOVEI	CNT,=36		;1
L:	LSHC	0,1		;36
	EXCH	A,B		;36
	LSHC	0,-1		;36
	EXCH	A,B		;36
	SOJG	CNT,L		;36
	MOVE	A,B		;1
	POPJ	P,		;1
END

⊂TITLE A2 - Bit Serial using SKIPGE/IORI - 165 cycles. 8 words. ⊃
	ACCUMULATORS(A,B,CNT)
REV:	MOVEI CNT,=36		;1
L1:	SKIPGE A		;36
	IORI B,1		;18 average
	ROT A,1			;36
	ROT B,-1		;36
	SOJG CNT,L1		;36
	MOVE A,B		;1
	POPJ P,			;1
END

⊂TITLE A3 - Bit Serial using IDPB to write - 108 cycles. 7 words.⊃
	ACCUMULATORS(A,B,PTR)
REV:	MOVE	PTR,[POINT 1,B,-1]	; 1
L:	IDPB	A,PTR			;35 average
	LSH	A,-1			;35 average
	JUMPN	A,L			;35 average
	MOVE	A,B			; 1
	POPJ	P,			; 1
END

⊂TITLE A4 - Bit Serial using ILDB to read - 184 cycles. 13 words.⊃
	ACCUMULATORS(A,B,PTR1,PTR2)
REV:	MOVE PTR1,[POINT 1,A]		;1 Source bit pointer.
	MOVE PTR2,[POINT 1,B,35]	;1 Destination bit pointer.
L1:	ILDB PTR1			;36
	DPB  PTR2			;36
	ADD  PTR2,[1B7]			;36 Decrement byte pointer.
	CAME PTR2,[POINT 1,B]		;36
	JRST L1				;36
	MOVE A,B			;1
	POPJ P,				;1
END
⊂TITLE B1 - Triple Tricky with computed mask - 119 cycles. 11 words.⊃
	ACCUMULATORS(A,MASK)
REV:	MOVE MASK,[1B0+1]	; 1
L1:	TDCE A,MASK		;18
	TDCE A,MASK		;13.5
	TDCE A,MASK		;13.5
	DPB MASK,[17,MASK,33]	;18
	LSH MASK,-1		;18
	TRZ MASK,1		;18
	JUMPN MASK,L1		;18
	POPJ	P,		; 1
END

⊂TITLE B2 - Triple tricky using 18 masks from table - 65 cycles. 24 words.⊃
	ACCUMULATORS(A,CNT)
REV:	MOVEI	CNT,=18			; 1
L:	TDCE	A,TABLE(CNT)		;18
	TDCE	A,TABLE(CNT)		;13.5
	TDCE	A,TABLE(CNT)		;13.5
	SOJG	CNT,L			;18
	POPJ	P,			; 1
TABLE: block =18
END

⊂TITLE B3 - Triple tricky using 9 masks from table - 57 cycles. 19 words.⊃
	ACCUMULATORS(A,CNT,MASK)
REV:	MOVEI	CNT,=9			; 1
L1:	TDCE	A,TABLE(CNT)		; 9
	TDCE	A,TABLE(CNT)		; 6.25
	TDCE	A,TABLE(CNT)		; 6.25
	TSCE	A,TABLE(CNT)		; 9
	TSCE	A,TABLE(CNT)		; 6.25
	TSCE	A,TABLE(CNT)		; 6.25
	SOJG	CNT,L			; 9
	MOVSS	A			; 1
	POPJ	P,			; 1
TABLE: block =9
END
⊂TITLE B4 - Triple Tricky 12-bytes of 3-bits  - 70 cycles. 11 words.⊃
	ACCUMULATORS(A,AA,BB,CNT,PTR)
REV:	MOVE PTR,[POINT 5,A,-1]		;1
	MOVEI CNT,=12			;1
L:	ILDB AA,PTR			;12	Fetch Byte from A.
	TRCE AA,5			;12
	TRCE AA,5			;9
	TRCE AA,5			;9
	LSHC AA,-3			;12	Store Byte into BB.
	SOJG CNT,L			;12	Loop.
	MOVE A,BB			;1
	POPJ P,				;1
END

⊂TITLE C1 - Impossible Table Lookup 1-Byte  of 36-bits - 2 cycles. 2↑36+2 words.⊃
REV:	MOVE A,TABLE(A)
	POPJ P,
TABLE:	BLOCK 2↑=36 		;That's almost 69 Gigawords !
END

⊂TITLE C2 - Impossible Table Lookup 2-Bytes of 18-bits -  4 cycles. 2↑18+4 words.⊃
REV:	HRR A,TABLE(A)		;1
	MOVSS A			;1
	HRR A,TABLE(A)		;1
	POPJ P,			;1
TABLE:	=262144			;Impossible size - quarter mega word.
END

⊂TITLE C3 - Table Lookup 3-bytes of 12-bits - 9 cycles. 4109 words.⊃
	ACCUMULATORS(A,X,B)
REV:	LDB X,[POINT 12,A,11]		;LEFT BYTE OF A.
	HRR B,TABLE(X)			;RIGHT BYTE OF B.
	LDB X,[POINT 12,A,35]		;RIGHT BYTE OF A.
	HLL B,TABLE(X)			;LEFT BYTE OF B.
	LDB X,[POINT 12,A,23]		;CENTER BYTE OF A.
	MOVE X,TABLE(X)
	DPB  X,[POINT 12,B,23]		;CENTER BYTE OF B.
	MOVE B,A
	POPJ P,
TABLE:	BLOCK =4096 ;each word contains 3 copies of reversed bits.
END

⊂TITLE C4 - Table lookup 4.5-bytes of 8-bits - 28 cycles. 269 words. ⊃
REV:	MOVE C,[POINT 8,A]	;1
	MOVE D,[POINT 8,B,35]	;1
L1:	ILDB AA,C		;4
	MOVE AA,TABLE(AA)	;4
	DPB AA,D		;4
	ADD D,[8B7]		;4 DECREM BYTE POINTER.
	JUMPGE D,L1		;4
	LDB AA,[POINT 4,A,35]	;1
	MOVE AA,TABLE(AA)	;1
	LSH AA,-4		;1
	DPB AA,[POINT 4,B,3]	;1
	MOVE A,B		;1
	POPJ P,			;1
TABLE:	BLOCK =256
END
⊂ TITLE C5 - Table lookup 5-bytes of 7-bits - 25 cycles. 139 words.⊃
	ACCUMULATORS(A,AA,BB,CNT,PTR)
REV:	DPB A,[POINT 1,BB,0]		;1
	MOVE PTR,[POINT 7,A,-1]		;1 Init Source pointer.
	MOVEI CNT,5			;1 Init Iteration count.
L:	ILDB	AA,PTR			;5	Fetch Byte from A.
	MOVE	AA,TABLE(AA)		;5	Reverse bits.
	LSHC	AA,-7			;5	Store Byte into BB.
	SOJG	CNT,L			;5	Loop.
	MOVE	A,BB			;1
	POPJ	P,			;1
TABLE:	block =128
END

⊂TITLE C6 - Table lookup 6-bytes of 6-bits - 28 cycles. 72 words.⊃
	ACCUMULATORS(A,AA,BB,CNT,PTR)
REV:	MOVE	PTR,[POINT 6,A,-1]	;1
	MOVEI	CNT,6			;1
L:	ILDB	AA,PTR			;6
	MOVE	AA,TABLE(AA)		;6
	LSHC	AA,-6			;6
	SOJG	CNT,L			;6
	MOVE	A,BB			;1
	POPJ	P,			;1
TABLE:	block =64
END

⊂TITLE C7 - Table lookup 7-bytes of 5-bits + 1 bit  - 33 cycles. 43 words.⊃
	ACCUMULATORS(A,AA,BB,CNT,PTR)
REV:	DPB A,[POINT 1,BB,0]		;1
	MOVE PTR,[POINT 5,A,-1]		;1
	MOVEI CNT,7			;1
L:	ILDB AA,PTR			;7	Fetch Byte from A.
	MOVE AA,TABLE(AA)		;7	Reverse bits.
	LSHC AA,-5			;7	Store Byte into BB.
	SOJG CNT,L			;7	Loop.
	MOVE A,BB			;1
	POPJ P,				;1
TABLE:	block =32
END

⊂TITLE C9 - Table lookup 9-bytes of 4-bits  - 40 cycles. 26 words.⊃
	ACCUMULATORS(A,AA,BB,CNT,PTR)
REV:	MOVE PTR,[POINT 5,A,-1]		;1
	MOVEI CNT,9			;1
L:	ILDB AA,PTR			;9	Fetch Byte from A.
	MOVE AA,TABLE(AA)		;9	Reverse bits.
	LSHC AA,-4			;9	Store Byte into BB.
	SOJG CNT,L			;9	Loop.
	MOVE A,BB			;1
	POPJ P,				;1
TABLE:	block =16
END

⊂TITLE C12 - Table lookup 12-bytes of 3-bits  - 52 cycles. 16 words.⊃
	ACCUMULATORS(A,AA,BB,CNT,PTR)
REV:	MOVE PTR,[POINT 5,A,-1]		;1
	MOVEI CNT,=12			;1
L:	ILDB AA,PTR			;12	Fetch Byte from A.
	MOVE AA,TABLE(AA)		;12	Reverse bits.
	LSHC AA,-3			;12	Store Byte into BB.
	SOJG CNT,L			;12	Loop.
	MOVE A,BB			;1
	POPJ P,				;1
TABLE:	block =8
END

⊂TITLE C18 - Table lookup 18-bytes of 2-bits  - 76 cycles. 12 words.⊃
	ACCUMULATORS(A,AA,BB,CNT,PTR)
REV:	MOVE PTR,[POINT 5,A,-1]		;1
	MOVEI CNT,=18			;1
L:	ILDB AA,PTR			;18	Fetch Byte from A.
	MOVE AA,TABLE(AA)		;18	Reverse bits.
	LSHC AA,-2			;18	Store Byte into BB.
	SOJG CNT,L			;18	Loop.
	MOVE A,BB			;1
	POPJ P,				;1
TABLE:	block =4
END

⊂TITLE C36 - "Table Lookup" 36-bytes of 1-bit  - 112 cycles. 8 words.⊃
	ACCUMULATORS(A,AA,BB,CNT,PTR)
REV:	MOVE PTR,[POINT 5,A,-1]		;1
	MOVEI CNT,=36			;1
L:	ILDB AA,PTR			;36	Fetch Byte from A.
	LSHC AA,-1			;36	Store Byte into BB.
	SOJG CNT,L			;36	Loop.
	MOVE A,BB			;1
	POPJ P,				;1
END

⊂TITLE D1 - Nested byte swapping -  21 cycles. 25 words.⊃
	ACCUMULATORS(A,B,C)
REV:	MOVS	0,A		; CCC DDD AAA BBB   AAA BBB CCC DDD
	ROTC	0,=27		; BBB AAA BBB CCC   DDD CCC DDD AAA
	HLR	A,0		; contents of A =   DDD CCC BBB AAA
	MOVE	C,A
	AND	C,[007007007007]
	LSH	C,6
	MOVE	B,A
	AND	B,[070070070070]
	IOR	C,B
	LSH	A,-6
	AND	A,[007007007007]
	IORB	A,C
	AND	C,[111111111111]
	LSH	C,2
	MOVE	B,A
	AND	B,[222222222222]
	IOR 	C,B
	LSH	A,-2
	AND	A,[111111111111]
	IOR	A,C
	POPJ	P,
END

⊂TITLE D2 - Nested byte swapping -  17 cycles. 24 words.⊃
	ACCUMULATORS(A,B,C)
REV:	MOVS	0,A		;Swap Halves.
	ROTC	0,=27		;Swap Quarters.
	HLR	A,0
	DPB A,[POINT 30,C,29]	;Swap every 1st and 3rd 3-bit byte.
	AND C,[700700700700]
	LDB B,[POINT 30,A,29]
	AND B,[007007007007]
	AND A,[070070070070]
	IOR A,B
	IORB A,C
	AND C,[555555555555]	;Swap every 1st and 3rd bit.
	DPB C,[POINT 34,B,33]
	XOR C,B
	LSH C,-2
	MULI C,5
	XOR A,C
	POPJ P,
END
⊂TITLE D3 - Nested byte swapping -  22 cycles. 32 words.⊃
	ACCUMULATORS(A,B,C)
REV:	LDB B,[POINT 24,A,23]	;Swap 1st and 3rd 12-bit bytes into B.
	DPB A,[POINT 12,B,8]
	DPB C,[POINT 30,A,29]	;Swap every 1st and 3rd 3-bit byte.
	AND A,[700700700700]
	LDB B,[POINT 30,C,29]
	AND B,[007007007007]
	AND C,[070070070070]
	IOR A,B
	IORB A,C
	AND C,[463146314631]	;Swap ever 1st and 4th bit.
	IMULI C,11
	LSH C,-3
	AND C,[042104210421]
	IMULI C,11
	XORB A,C
	ANDCM C,[463146314631]	;Swap every 2nd and 3rd bit.
	IMULI C,3
	LSH C,-1
	AND C,[104210421042]
	IMULI C,3
	XOR A,C
	POPJ P,
END
⊂ TITLE E1 - Schroeppel Reverse 5-Bytes of 7-bits - 40 cycles. 18 words.⊃
	ACCUMULATORS(A,B,C,D,AA,BB)
REV:	MOVE C,[POINT 7,A]	;1
	MOVE D,[POINT 7,B,35]	;1
L1:	ILDB AA,C		;5
	IMUL AA,[10004002001]	;5 Four copies separated by 000's base 2.
	AND AA,[210210210010]	;5 Position where bits coincide with rev.
	IDIVI AA,377		;5 Casting out 377's.
	DPB BB,D		;5
	ADD D,[7B7]		;5 Decrement  byte pointer.
	JUMPGE D,L1		;5
	DPB A,[POINT 1,B,0]	;1  odd bit to other end.
	MOVE A,B		;1
	POPJ P,			;1
END

⊂TITLE E2 - Four and a half bytes of 8 bits using a Schroeppel 8-bit reverse.⊃
; 43 CYCLES. 25 WORDS.
	ACCUMULATORS(A,B,C,D,AA,BB)
REV:	MOVE C,[POINT 8,A]	;1
	MOVE D,[POINT 8,B,35]	;1
L1:	ILDB AA,C		;4
	MUL AA,[100200401002]	;4 Five copies in A and B.
	AND BB,[20420420020]	;4 Bits coincide with rev repeated 2↑10.
	ANDI AA,41		;4
	DIVI AA,1777		;4 Casting out 1023.'s
	DPB AA,D		;4
	ADD D,[8B7]		;4
	JUMPGE D,L1		;4
	TRCE A,11		;1
	TRCE A,11		;1
	TRCE A,11		;1
	TRCE A,6		;1
	TRCE A,6		;1
	TRCE A,6		;1
	DPB A,[POINT 4,B,3]	;1
	MOVE A,B		;1
	POPJ P,			;1
END
⊂ TITLE E3 - Schroeppel 8 bit reverse in line - 44 Cycles. 36 words. ⊃
	ACCUMULATORS(A,B,C,D,AA,BB)
REV:	MOVE C,A
	LDB A,[POINT 8,C,7] ↔PUSHJ P,S8REV↔DPB A,[POINT 8,D,35]
	LDB A,[POINT 8,C,15]↔PUSHJ P,S8REV↔DPB A,[POINT 8,D,27]
	LDB A,[POINT 8,C,23]↔PUSHJ P,S8REV↔DPB A,[POINT 8,D,19]
	LDB A,[POINT 8,C,31]↔PUSHJ P,S8REV↔DPB A,[POINT 8,D,11]
	LDB A,[POINT 4,C,35]↔PUSHJ P,S8REV↔LSH A,-4↔DPB A,[POINT 4,D,3]
	MOVE A,D
	POPJ P,
S8REV:	MUL A,[100200401002]	;Five copies in A and B
	AND B,[20420420020]
	ANDI A,41
	DIVI A,1777		;Casting out 1023.'s
	POPJ P,
END

⊂TITLE F1 - Table lookup JFFO method (contributed by Jeff Rubin) - 59 cycles. 80 words. ⊃
	ACCUMULATORS{A,B,C,D}
REV:	MOVNI D,1		;1
	MOVEI C,0		;1
	JRST L2			;1
L1:	DPB D,TABLE1(B)		;18 AVERAGE
	ANDCM A,TABLE2(B)	;18 AVERAGE
L2:	JFFO A,L1		;18 AVERAGE
	MOVE A,C		;1
	POPJ P,			;1
TABLE1:	000100,,C		;=36 words of byte pointers.
	010100,,C
	020100,,C	 (etc)
TABLE2: 400000,,0		;=36 words of bit masks.
	200000,,0
	100000,,0	 (etc)
END

⊂TITLE F2 - Computation based JFFO method - 115 cycles. 16 words.⊃
REV:	MOVEI D,0		;1 Result Word.
	MOVE  C,[POINT 1,D]	;1 Pointer to a Result Bit.
	MOVE  E,[POINT 1,A]	;1 Pointer to a Result Bit.
	JRST L2			;1
L1:	DPB B,[POINT 6,C,5]	;18 Put JFFO count into P-field of C.
	DPB E,C			;18 Place ones bit into result.
	MOVNI B,-=35(B)		;18
	DPB B,[POINT 6,E,5]	;18 P-field of E ←(35-B)
	DPB C,E			;18 Place zero bit into source.
L2:	JFFO A,L1		;18+1
	MOVE A,D		;1
	POPJ P,			;1
END
⊂ TITLE E3 - Schroeppel 8 bit reverse in line - 46 Cycles. 18 words. ⊃
	ACCUMULATORS(A,B,C,D,AA,BB)
REV:	SKIPA PTR,[POINT 8,A]		;1
L1:	LSHC BB,-8			;4
L2:	ILDB AA,PTR			;5
	MUL AA,[100200401002]		;5
	AND BB,[20420420020]		;5
	ANDI AA,41			;5
	DIVI AA,1777			;5
	TLNE PTR,700000			;5
	JRST L1				;4
	TLCE PTR,001400			;2
	JRST L2				;1
	LSH BB,-4			;1
	LSHC BB,-4			;1
	MOVE A,BB			;1
	POPJ P,				;1
END